# This file is part of the replication packet for "A Low-Cost Information Nudge Increases Citizenship Application Rates Among Low-Income Immigrants"
#
# It does not replicate the map in the paper, but provides code for creating a similar map. It inputs geocoded participant data that includes
# two groups of participants and inputs a map of NYC. It codes the locations of the participants on the map. 
# Input: 
#     exampleGeoData.csv : a csv file that has columns labeled lonfinal and latfinal for all participants
#     nybb.shp : new york city shape files. These can be downloaded at NYC government web site 
#                (https://www1.nyc.gov/site/planning/data-maps/open-data/districts-download-metadata.page)
#                Borough Boundaries (Clipped to Shoreline)	
# Output:
#     signup_nyc.pdf : a pdf file that is a map of NYC with geocoded participant locations
#
# The code in this script is based on code written by Hans Leuder. 



# loading packages --------------------------------------------------------

rm(list=ls())
library(ggmap)
library(geosphere)
library(maptools)
library(rgeos)
library(scales)
library(maptools)
library(rgdal)
library(sp)
library(grid)
library(gridExtra)
library(geosphere)
library(ggplot2)


# setting data location ---------------------------------------------------

### need to change data location

# input the location for the geo-coded data and the output for the map figure
data_location <- ""
output_location <- ""


# reading in the data -----------------------------------------------------


# read in the geocoded data. This file should have columns named "lonfinal" and "latfinal"
setwd(data_location)
d <- read.csv("exampleGeoData.csv", stringsAsFactors = FALSE)

# reading in the NYC shape file
setwd(paste(data_location,"/New York City/",sep=""))
shapefile.nyc <- rgdal::readOGR("nybb.shp")

# adjusting the shape file and participant dataframe -------------------------------------------

# dropping observations that are NA
d <- d[!is.na(d$lonfinal),]

# Transform the coordinates s.t. we can use them for the New York City shape file

# extract coordinates from participant data dataframe and assign to class coordinates
coord.address <- data.frame(lon=d$lonfinal, lat=d$latfinal)
coordinates(coord.address) <- c("lon", "lat")

# assign the Coordinate Reference System (CRS) code in which the coordinates are written
proj4string(coord.address) <- CRS("+init=EPSG:4326")

# transform the NYC shape file to the 4326 CRS
shapefile.nyc <- spTransform(shapefile.nyc,"+init=EPSG:4326")


# turn both into data frames to be used with ggplot
shapefile.nyc.frame <- fortify(shapefile.nyc, region="BoroName")
coord.address <- as.data.frame(coord.address)

# coding whether someone recieved a fee waiver into a factor
coord.address$anyfeewaivernote <- factor(d$anyfeewaivernote,levels = unique(d$anyfeewaivernote) ,labels = c("Received a Referral-Only \nNotice",
                                                                                                            "Received a Fee \nWaiver Notice")[2:1])


# remove all points that are not within the NYC shape file. Otherwise, does not project.
coord.address$lon.nyc.new <- ifelse(coord.address$lon >= min(shapefile.nyc.frame$long, na.rm=T) & 
                                      coord.address$lon <= max(shapefile.nyc.frame$long, na.rm=T), coord.address$lon, NA)
coord.address$lat.nyc.new <- ifelse(coord.address$lat >= min(shapefile.nyc.frame$lat, na.rm=T) & 
                                      coord.address$lat <= max(shapefile.nyc.frame$lat, na.rm=T), coord.address$lat, NA)



# plotting the location of respondents  ----------------------------

# ggplot includes axis labels and jittering to prevent precise location from being displayed
ggplot(data=coord.address, aes(x=lon.nyc.new, y=lat.nyc.new)) +
  geom_polygon(data=shapefile.nyc.frame, aes(x=long, y=lat, group=group), fill="gray85")  + 
  geom_jitter(data=coord.address, aes(x=lon.nyc.new, y=lat.nyc.new, 
                                      col=anyfeewaivernote, shape=anyfeewaivernote),alpha=6/10,size=1) +
  theme_bw() +  theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) +
  theme(plot.title = element_text(size=16, face="bold", colour="black",
                                  hjust=.5)) + 
  scale_colour_manual(values=c("indianred1", "cornflowerblue"),
                      name="") +
  theme(legend.title=element_text(face="bold", size=11),
        legend.text = element_text(size = 10), legend.position="bottom") + 
  scale_shape_manual(values=c(19,19),
                     name="",guide=FALSE) + 
  scale_fill_discrete(name = "Fee Waiver Recipients", label) +
  scalebar(data=shapefile.nyc.frame, dist=5, location="bottomright", st.size=3, 
           height=0.01, model = "WGS84", dd2km=TRUE)

setwd(output_location)
ggsave("signup_nyc.pdf", height = 6, width=6)



